library(data.table)
library(tidyverse)
library(rmarkdown)
library(ggplot2)
library(pheatmap)
library(AnnotationHub)
library(tximport)
library(Rsubread)
library(DESeq2)
library(UpSetR)
AnnotationSpecies <- "Homo sapiens" # Assign your species
ah <- AnnotationHub(hub=getAnnotationHubOption("URL")) # Bring annotation DB
ahQuery <- query(ah, c("OrgDb", AnnotationSpecies)) # Filter annotation of interest
if (length(ahQuery) == 1) {
DBName <- names(ahQuery)
} else if (length(ahQuery) > 1) {
DBName <- names(ahQuery)[1]
} else {
print("You don't have a valid DB")
rmarkdown::render()
}
AnnoDb <- ah[[DBName]] # Store into an OrgDb object
# Explore your OrgDb object with following accessors:
# columns(AnnpDb)
# keytypes(AnnoDb)
# keys(AnnoDb, keytype=..)
# select(AnnoDb, keys=.., columns=.., keytype=...)
AnnoKey <- keys(AnnoDb, keytype="ENSEMBLTRANS")
# Note: Annotation has to be done with not genome but transcripts
AnnoDb <- select(AnnoDb,
AnnoKey,
keytype="ENSEMBLTRANS",
columns="ENSEMBL")
# Check if your AnnoDb has been extracted and saved correctely
class(AnnoDb)
## [1] "data.frame"
head(AnnoDb) # The column 1 has to assign transcript (e.g. ENSEMBLTRANS)
## ENSEMBLTRANS ENSEMBL
## 1 ENST00000543404 ENSG00000256069
## 2 ENST00000566278 ENSG00000256069
## 3 ENST00000545343 ENSG00000256069
## 4 ENST00000544183 ENSG00000256069
## 5 ENST00000286479 ENSG00000156006
## 6 ENST00000520116 ENSG00000156006
# This code chunk needs to be written by yourself
# Define sample names
SampleNames <- c(paste0("Mock-rep", 1:3), paste0("SARS-CoV-2-rep", 1:3))
# Aligner names
Aligners <- c("Salmon", "STAR", "HISAT2")
# Define group level
GroupLevel <- c("Mock", "Covid")
# Define contrast for DE analysis
Contrast <- c("Group", GroupLevel)
# Set a function for file paths
path.fn <- function(head, tail) {
vec <- c(paste0(head, # head = e.g. "hisat2", "star", or "salmon"
"_output/",
SampleNames,
tail)) # tail = file name after SampleNames
return(vec)
}
# Define .sf file path
sf <- path.fn("salmon",
".salmon_quant/quant.sf")
# Define STAR file path
star <- path.fn("star",
"Aligned.sortedByCoord.out.bam")
# Define HISAT2 file path
hisat <- path.fn("hisat2",
".sorted.bam")
# Define sample groups
group <- c(rep("Mock", 3), rep("Covid", 3))
# Create metadata
metadata <- data.frame(Sample=factor(SampleNames, levels=SampleNames),
Group=factor(group, levels=GroupLevel),
Salmon_path=sf,
STAR_path=star,
HISAT2_path=hisat)
# Assign row names with sample names
rownames(metadata) <- SampleNames
# Explore the metadata
print(metadata)
## Sample Group
## Mock-rep1 Mock-rep1 Mock
## Mock-rep2 Mock-rep2 Mock
## Mock-rep3 Mock-rep3 Mock
## SARS-CoV-2-rep1 SARS-CoV-2-rep1 Covid
## SARS-CoV-2-rep2 SARS-CoV-2-rep2 Covid
## SARS-CoV-2-rep3 SARS-CoV-2-rep3 Covid
## Salmon_path
## Mock-rep1 salmon_output/Mock-rep1.salmon_quant/quant.sf
## Mock-rep2 salmon_output/Mock-rep2.salmon_quant/quant.sf
## Mock-rep3 salmon_output/Mock-rep3.salmon_quant/quant.sf
## SARS-CoV-2-rep1 salmon_output/SARS-CoV-2-rep1.salmon_quant/quant.sf
## SARS-CoV-2-rep2 salmon_output/SARS-CoV-2-rep2.salmon_quant/quant.sf
## SARS-CoV-2-rep3 salmon_output/SARS-CoV-2-rep3.salmon_quant/quant.sf
## STAR_path
## Mock-rep1 star_output/Mock-rep1Aligned.sortedByCoord.out.bam
## Mock-rep2 star_output/Mock-rep2Aligned.sortedByCoord.out.bam
## Mock-rep3 star_output/Mock-rep3Aligned.sortedByCoord.out.bam
## SARS-CoV-2-rep1 star_output/SARS-CoV-2-rep1Aligned.sortedByCoord.out.bam
## SARS-CoV-2-rep2 star_output/SARS-CoV-2-rep2Aligned.sortedByCoord.out.bam
## SARS-CoV-2-rep3 star_output/SARS-CoV-2-rep3Aligned.sortedByCoord.out.bam
## HISAT2_path
## Mock-rep1 hisat2_output/Mock-rep1.sorted.bam
## Mock-rep2 hisat2_output/Mock-rep2.sorted.bam
## Mock-rep3 hisat2_output/Mock-rep3.sorted.bam
## SARS-CoV-2-rep1 hisat2_output/SARS-CoV-2-rep1.sorted.bam
## SARS-CoV-2-rep2 hisat2_output/SARS-CoV-2-rep2.sorted.bam
## SARS-CoV-2-rep3 hisat2_output/SARS-CoV-2-rep3.sorted.bam
# "mm10", "mm9", "hg38", or "hg19"
annot.inbuilt <- "hg38"
# GTF file path
annot.ext <- "../SEQC/reference_GENCODE/gencode.v36.primary_assembly.annotation.gtf"
# annotation type:
# e.g.: "gene_id", "transcript_id", or "gene_name"
GTF.attrType <- "gene_id"
# Number of cores
nthread <- 16
# Set a function importing counts from BAM files with featureCounts()
fcounts.fn <- function(vec) {
fc <- featureCounts(files=vec, # a vector assigning BAM file paths
annot.inbuilt=annot.inbuilt,
annot.ext=annot.ext,
GTF.attrType=GTF.attrType,
isGTFAnnotationFile=T,
nthread=nthread,
isPairedEnd=F, # Set this parameter correctly
verbose=T)
return(fc$counts)
}
# Import gene level summarized counts
salmon.txi <- tximport(metadata$Salmon_path,
type = "salmon",
tx2gene=AnnoDb,
ignoreTxVersion=T,
txOut=F) # TRUE for transcript level, FALSE for gene level
# Extract the counts and save as a data frame
salmon.counts <- salmon.txi$counts
# Explore the salmon count data frame
head(salmon.counts)
## [,1] [,2] [,3] [,4] [,5] [,6]
## ENSG00000000005 7.000 4.000 0.000 2.000 1.000 4.000
## ENSG00000001561 2889.000 2782.000 2515.000 2498.000 1102.000 2073.000
## ENSG00000002933 17.000 32.000 18.000 25.000 14.000 24.000
## ENSG00000003056 1423.407 1538.976 1321.634 2218.945 1274.869 1318.894
## ENSG00000003137 7.000 1.000 4.000 2.000 0.000 4.000
## ENSG00000004478 277.049 305.781 224.858 378.571 237.257 310.414
dim(salmon.counts)
## [1] 12202 6
summary(salmon.counts)
## V1 V2 V3 V4
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 0.00 Median : 0.0 Median : 0.0 Median : 1.0
## Mean : 307.78 Mean : 294.9 Mean : 258.4 Mean : 409.6
## 3rd Qu.: 10.82 3rd Qu.: 10.0 3rd Qu.: 8.0 3rd Qu.: 13.0
## Max. :262039.61 Max. :268814.9 Max. :208422.0 Max. :555480.7
## V5 V6
## Min. : 0 Min. : 0.0
## 1st Qu.: 0 1st Qu.: 0.0
## Median : 0 Median : 0.0
## Mean : 228 Mean : 298.4
## 3rd Qu.: 7 3rd Qu.: 9.0
## Max. :242979 Max. :370575.7
# Extract counts by running featureCounts()
star.counts <- fcounts.fn(metadata$STAR_path)
##
## ========== _____ _ _ ____ _____ ______ _____
## ===== / ____| | | | _ \| __ \| ____| /\ | __ \
## ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
## ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
## ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
## ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
## Rsubread 2.4.0
##
## //========================== featureCounts setting ===========================\\
## || ||
## || Input files : 6 BAM files ||
## || ||
## || Mock-rep1Aligned.sortedByCoord.out.bam ||
## || Mock-rep2Aligned.sortedByCoord.out.bam ||
## || Mock-rep3Aligned.sortedByCoord.out.bam ||
## || SARS-CoV-2-rep1Aligned.sortedByCoord.out.bam ||
## || SARS-CoV-2-rep2Aligned.sortedByCoord.out.bam ||
## || SARS-CoV-2-rep3Aligned.sortedByCoord.out.bam ||
## || ||
## || Paired-end : no ||
## || Count read pairs : no ||
## || Annotation : gencode.v36.primary_assembly.annotation.gtf ... ||
## || Dir for temp files : . ||
## || Threads : 16 ||
## || Level : meta-feature level ||
## || Multimapping reads : counted ||
## || Multi-overlapping reads : not counted ||
## || Min overlapping bases : 1 ||
## || ||
## \\============================================================================//
##
## //================================= Running ==================================\\
## || ||
## || Load annotation file gencode.v36.primary_assembly.annotation.gtf ... ||
## || Features : 1430132 ||
## || Meta-features : 60719 ||
## || Chromosomes/contigs : 47 ||
## || ||
## || Process BAM file Mock-rep1Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 54178083 ||
## || Successfully assigned alignments : 29102650 (53.7%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file Mock-rep2Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 52009761 ||
## || Successfully assigned alignments : 28582595 (55.0%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file Mock-rep3Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 44486311 ||
## || Successfully assigned alignments : 25165867 (56.6%) ||
## || Running time : 0.05 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep1Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 65798929 ||
## || Successfully assigned alignments : 35937122 (54.6%) ||
## || Running time : 0.08 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep2Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 34330831 ||
## || Successfully assigned alignments : 19826399 (57.8%) ||
## || Running time : 0.04 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep3Aligned.sortedByCoord.out.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 50803344 ||
## || Successfully assigned alignments : 27376989 (53.9%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Write the final count table. ||
## || Write the read assignment summary. ||
## || ||
## \\============================================================================//
# Explore the STAR count data frame
head(star.counts)
## Mock-rep1Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 24
## ENSG00000227232.5 52
## ENSG00000278267.1 0
## ENSG00000243485.5 8
## ENSG00000284332.1 0
## ENSG00000237613.2 327
## Mock-rep2Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 13
## ENSG00000227232.5 46
## ENSG00000278267.1 0
## ENSG00000243485.5 9
## ENSG00000284332.1 0
## ENSG00000237613.2 311
## Mock-rep3Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 17
## ENSG00000227232.5 32
## ENSG00000278267.1 0
## ENSG00000243485.5 7
## ENSG00000284332.1 0
## ENSG00000237613.2 270
## SARS-CoV-2-rep1Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 29
## ENSG00000227232.5 39
## ENSG00000278267.1 0
## ENSG00000243485.5 5
## ENSG00000284332.1 0
## ENSG00000237613.2 101
## SARS-CoV-2-rep2Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 6
## ENSG00000227232.5 26
## ENSG00000278267.1 0
## ENSG00000243485.5 2
## ENSG00000284332.1 0
## ENSG00000237613.2 97
## SARS-CoV-2-rep3Aligned.sortedByCoord.out.bam
## ENSG00000223972.5 16
## ENSG00000227232.5 45
## ENSG00000278267.1 0
## ENSG00000243485.5 10
## ENSG00000284332.1 0
## ENSG00000237613.2 116
dim(star.counts)
## [1] 60719 6
summary(star.counts)
## Mock-rep1Aligned.sortedByCoord.out.bam Mock-rep2Aligned.sortedByCoord.out.bam
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 2.0 Median : 1.0
## Mean : 479.3 Mean : 470.7
## 3rd Qu.: 31.0 3rd Qu.: 29.0
## Max. :2096551.0 Max. :2203523.0
## Mock-rep3Aligned.sortedByCoord.out.bam
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 1.0
## Mean : 414.5
## 3rd Qu.: 23.0
## Max. :1850592.0
## SARS-CoV-2-rep1Aligned.sortedByCoord.out.bam
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 2.0
## Mean : 591.9
## 3rd Qu.: 38.0
## Max. :1464055.0
## SARS-CoV-2-rep2Aligned.sortedByCoord.out.bam
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 1.0
## Mean : 326.5
## 3rd Qu.: 20.0
## Max. :887660.0
## SARS-CoV-2-rep3Aligned.sortedByCoord.out.bam
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 1.0
## Mean : 450.9
## 3rd Qu.: 27.0
## Max. :1410329.0
# Extract counts by running featureCounts()
hisat2.counts <- fcounts.fn(metadata$HISAT2_path)
##
## ========== _____ _ _ ____ _____ ______ _____
## ===== / ____| | | | _ \| __ \| ____| /\ | __ \
## ===== | (___ | | | | |_) | |__) | |__ / \ | | | |
## ==== \___ \| | | | _ <| _ /| __| / /\ \ | | | |
## ==== ____) | |__| | |_) | | \ \| |____ / ____ \| |__| |
## ========== |_____/ \____/|____/|_| \_\______/_/ \_\_____/
## Rsubread 2.4.0
##
## //========================== featureCounts setting ===========================\\
## || ||
## || Input files : 6 BAM files ||
## || ||
## || Mock-rep1.sorted.bam ||
## || Mock-rep2.sorted.bam ||
## || Mock-rep3.sorted.bam ||
## || SARS-CoV-2-rep1.sorted.bam ||
## || SARS-CoV-2-rep2.sorted.bam ||
## || SARS-CoV-2-rep3.sorted.bam ||
## || ||
## || Paired-end : no ||
## || Count read pairs : no ||
## || Annotation : gencode.v36.primary_assembly.annotation.gtf ... ||
## || Dir for temp files : . ||
## || Threads : 16 ||
## || Level : meta-feature level ||
## || Multimapping reads : counted ||
## || Multi-overlapping reads : not counted ||
## || Min overlapping bases : 1 ||
## || ||
## \\============================================================================//
##
## //================================= Running ==================================\\
## || ||
## || Load annotation file gencode.v36.primary_assembly.annotation.gtf ... ||
## || Features : 1430132 ||
## || Meta-features : 60719 ||
## || Chromosomes/contigs : 47 ||
## || ||
## || Process BAM file Mock-rep1.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 54439819 ||
## || Successfully assigned alignments : 27242652 (50.0%) ||
## || Running time : 0.07 minutes ||
## || ||
## || Process BAM file Mock-rep2.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 51357583 ||
## || Successfully assigned alignments : 26791035 (52.2%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Process BAM file Mock-rep3.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 44166286 ||
## || Successfully assigned alignments : 23859049 (54.0%) ||
## || Running time : 0.05 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep1.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 68677505 ||
## || Successfully assigned alignments : 33942278 (49.4%) ||
## || Running time : 0.08 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep2.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 35316473 ||
## || Successfully assigned alignments : 18758834 (53.1%) ||
## || Running time : 0.04 minutes ||
## || ||
## || Process BAM file SARS-CoV-2-rep3.sorted.bam... ||
## || ||
## || Chromosomes/contigs in input file but not in annotation ||
## || KI270425.1 ||
## || KI270303.1 ||
## || KI270384.1 ||
## || KI270320.1 ||
## || KI270438.1 ||
## || KI270316.1 ||
## || KI270581.1 ||
## || KI270333.1 ||
## || KI270757.1 ||
## || KI270329.1 ||
## || KI270509.1 ||
## || KI270468.1 ||
## || KI270530.1 ||
## || KI270710.1 ||
## || KI270363.1 ||
## || KI270706.1 ||
## || KI270539.1 ||
## || KI270417.1 ||
## || KI270723.1 ||
## || KI270376.1 ||
## || KI270719.1 ||
## || KI270740.1 ||
## || KI270312.1 ||
## || KI270393.1 ||
## || KI270736.1 ||
## || KI270389.1 ||
## || GL000224.1 ||
## || KI270753.1 ||
## || KI270749.1 ||
## || KI270590.1 ||
## || KI270338.1 ||
## || KI270522.1 ||
## || KI270518.1 ||
## || KI270372.1 ||
## || KI270715.1 ||
## || KI270548.1 ||
## || KI270732.1 ||
## || KI270304.1 ||
## || KI270385.1 ||
## || KI270745.1 ||
## || KI270317.1 ||
## || KI270582.1 ||
## || KI270334.1 ||
## || KI270364.1 ||
## || KI270707.1 ||
## || KI270544.1 ||
## || KI270422.1 ||
## || KI270418.1 ||
## || KI270381.1 ||
## || KI270724.1 ||
## || KI270435.1 ||
## || GL000208.1 ||
## || KI270741.1 ||
## || KI270394.1 ||
## || KI270737.1 ||
## || KI270330.1 ||
## || KI270448.1 ||
## || KI270754.1 ||
## || KI270510.1 ||
## || KI270591.1 ||
## || KI270587.1 ||
## || KI270465.1 ||
## || KI270519.1 ||
## || KI270414.1 ||
## || KI270720.1 ||
## || KI270373.1 ||
## || KI270716.1 ||
## || KI270390.1 ||
## || KI270305.1 ||
## || KI270386.1 ||
## || KI270729.1 ||
## || GL000221.1 ||
## || KI270322.1 ||
## || KI270746.1 ||
## || KI270583.1 ||
## || KI270579.1 ||
## || KI270335.1 ||
## || KI270515.1 ||
## || KI270528.1 ||
## || KI270712.1 ||
## || KI270708.1 ||
## || KI270423.1 ||
## || KI270419.1 ||
## || KI270382.1 ||
## || KI270725.1 ||
## || KI270378.1 ||
## || KI270742.1 ||
## || KI270395.1 ||
## || KI270738.1 ||
## || GL000226.1 ||
## || KI270755.1 ||
## || KI270511.1 ||
## || KI270507.1 ||
## || KI270588.1 ||
## || KI270466.1 ||
## || GL000008.2 ||
## || KI270374.1 ||
## || KI270717.1 ||
## || KI270310.1 ||
## || KI270391.1 ||
## || KI270387.1 ||
## || KI270751.1 ||
## || KI270747.1 ||
## || KI270584.1 ||
## || KI270340.1 ||
## || KI270336.1 ||
## || KI270516.1 ||
## || KI270411.1 ||
## || KI270529.1 ||
## || KI270366.1 ||
## || KI270709.1 ||
## || KI270424.1 ||
## || KI270730.1 ||
## || KI270302.1 ||
## || KI270383.1 ||
## || KI270379.1 ||
## || GL000214.1 ||
## || KI270743.1 ||
## || KI270315.1 ||
## || KI270396.1 ||
## || KI270739.1 ||
## || KI270580.1 ||
## || KI270756.1 ||
## || KI270512.1 ||
## || KI270593.1 ||
## || KI270508.1 ||
## || KI270589.1 ||
## || KI270467.1 ||
## || KI270362.1 ||
## || KI270420.1 ||
## || KI270538.1 ||
## || KI270722.1 ||
## || KI270375.1 ||
## || KI270718.1 ||
## || KI270311.1 ||
## || KI270429.1 ||
## || KI270392.1 ||
## || KI270735.1 ||
## || KI270388.1 ||
## || KI270752.1 ||
## || KI270748.1 ||
## || KI270337.1 ||
## || KI270521.1 ||
## || KI270517.1 ||
## || KI270412.1 ||
## || KI270371.1 ||
## || KI270714.1 ||
## || ||
## || Single-end reads are included. ||
## || Total alignments : 52224139 ||
## || Successfully assigned alignments : 26005814 (49.8%) ||
## || Running time : 0.06 minutes ||
## || ||
## || Write the final count table. ||
## || Write the read assignment summary. ||
## || ||
## \\============================================================================//
# Explore the HISAT2 count data frame
head(hisat2.counts)
## Mock-rep1.sorted.bam Mock-rep2.sorted.bam
## ENSG00000223972.5 20 11
## ENSG00000227232.5 46 62
## ENSG00000278267.1 0 0
## ENSG00000243485.5 6 8
## ENSG00000284332.1 0 0
## ENSG00000237613.2 267 256
## Mock-rep3.sorted.bam SARS-CoV-2-rep1.sorted.bam
## ENSG00000223972.5 12 21
## ENSG00000227232.5 36 57
## ENSG00000278267.1 0 0
## ENSG00000243485.5 4 2
## ENSG00000284332.1 0 0
## ENSG00000237613.2 231 87
## SARS-CoV-2-rep2.sorted.bam SARS-CoV-2-rep3.sorted.bam
## ENSG00000223972.5 4 10
## ENSG00000227232.5 35 57
## ENSG00000278267.1 0 0
## ENSG00000243485.5 2 6
## ENSG00000284332.1 0 0
## ENSG00000237613.2 81 89
dim(hisat2.counts)
## [1] 60719 6
summary(hisat2.counts)
## Mock-rep1.sorted.bam Mock-rep2.sorted.bam Mock-rep3.sorted.bam
## Min. : 0.0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0
## Median : 1.0 Median : 1.0 Median : 1.0
## Mean : 448.7 Mean : 441.2 Mean : 392.9
## 3rd Qu.: 27.0 3rd Qu.: 26.0 3rd Qu.: 21.0
## Max. :1876599.0 Max. :2004351.0 Max. :1678512.0
## SARS-CoV-2-rep1.sorted.bam SARS-CoV-2-rep2.sorted.bam
## Min. : 0 Min. : 0.0
## 1st Qu.: 0 1st Qu.: 0.0
## Median : 2 Median : 1.0
## Mean : 559 Mean : 308.9
## 3rd Qu.: 33 3rd Qu.: 17.0
## Max. :1410876 Max. :804577.0
## SARS-CoV-2-rep3.sorted.bam
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 1.0
## Mean : 428.3
## 3rd Qu.: 24.0
## Max. :1328490.0
countList <- list(salmon.counts,
star.counts,
hisat2.counts)
# Assign names of the count data frames in the count list
names(countList) <- Aligners
# Set a function cleaning the count data frame
clean.fn <- function(df) {
# Convert to a data frame
df <- as.data.frame(df)
# Assign column names
names(df) <- SampleNames
# Bring row names to a column
df <- df %>% rownames_to_column(var="GENEID")
return(df)
}
# Set a function to drop GENEID version
clean.annotation.fn <- function(df) {
# Re-annotate without version specification
df <- separate(df, "GENEID", c("GENEID", "Version"))
# Remove version column
df <- df[, colnames(df) != "Version"]
return(df)
}
# Move GENEID to a column
for (x in Aligners) {
countList[[x]] <- clean.fn(countList[[x]])
}
# Remove version of GENEID and duplicated rows in STAR & HISAT2 count tables
for (x in Aligners) {
countList[[x]] <- clean.annotation.fn(countList[[x]]) %>%
distinct()
}
# Explore the cleaned count data frames
head(countList[[1]])
## GENEID Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1 SARS-CoV-2-rep2
## 1 ENSG00000000005 7.000 4.000 0.000 2.000 1.000
## 2 ENSG00000001561 2889.000 2782.000 2515.000 2498.000 1102.000
## 3 ENSG00000002933 17.000 32.000 18.000 25.000 14.000
## 4 ENSG00000003056 1423.407 1538.976 1321.634 2218.945 1274.869
## 5 ENSG00000003137 7.000 1.000 4.000 2.000 0.000
## 6 ENSG00000004478 277.049 305.781 224.858 378.571 237.257
## SARS-CoV-2-rep3
## 1 4.000
## 2 2073.000
## 3 24.000
## 4 1318.894
## 5 4.000
## 6 310.414
head(countList[[2]])
## GENEID Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1 SARS-CoV-2-rep2
## 1 ENSG00000223972 24 13 17 29 6
## 2 ENSG00000227232 52 46 32 39 26
## 3 ENSG00000278267 0 0 0 0 0
## 4 ENSG00000243485 8 9 7 5 2
## 5 ENSG00000284332 0 0 0 0 0
## 6 ENSG00000237613 327 311 270 101 97
## SARS-CoV-2-rep3
## 1 16
## 2 45
## 3 0
## 4 10
## 5 0
## 6 116
head(countList[[3]])
## GENEID Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1 SARS-CoV-2-rep2
## 1 ENSG00000223972 20 11 12 21 4
## 2 ENSG00000227232 46 62 36 57 35
## 3 ENSG00000278267 0 0 0 0 0
## 4 ENSG00000243485 6 8 4 2 2
## 5 ENSG00000284332 0 0 0 0 0
## 6 ENSG00000237613 267 256 231 87 81
## SARS-CoV-2-rep3
## 1 10
## 2 57
## 3 0
## 4 6
## 5 0
## 6 89
dim(countList[[1]])
## [1] 12202 7
dim(countList[[2]])
## [1] 60675 7
dim(countList[[3]])
## [1] 60695 7
sum(duplicated(countList[[1]]))
## [1] 0
sum(duplicated(countList[[2]]))
## [1] 0
sum(duplicated(countList[[3]]))
## [1] 0
# Convert Salmon counts to integers
countList[["Salmon"]] <- cbind(GENEID=countList[["Salmon"]][, "GENEID"],
round(countList[["Salmon"]][,
colnames(countList[["Salmon"]]) %in% SampleNames]))
# Explore the cleaned count data frames
head(countList[[1]])
## GENEID Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1 SARS-CoV-2-rep2
## 1 ENSG00000000005 7 4 0 2 1
## 2 ENSG00000001561 2889 2782 2515 2498 1102
## 3 ENSG00000002933 17 32 18 25 14
## 4 ENSG00000003056 1423 1539 1322 2219 1275
## 5 ENSG00000003137 7 1 4 2 0
## 6 ENSG00000004478 277 306 225 379 237
## SARS-CoV-2-rep3
## 1 4
## 2 2073
## 3 24
## 4 1319
## 5 4
## 6 310
# Set a function generating a data frame with sequencing depth
seq.depth.fn <- function(df, aligner) {
seqdf <- as.data.frame(colSums(df[, SampleNames])) %>%
rownames_to_column (var="Sample") %>%
mutate(Aligner=aligner)
names(seqdf) <- c("Sample", "Count", "Aligner")
return(seqdf)
}
# Set a function for a bar plot comparing values
comparing.barplot.fn <- function(df, yval, title, ytitle) {
ggplot(df,
aes(x=Sample, y=yval, group=Aligner, fill=Aligner)) +
geom_bar(stat="identity", position="dodge") +
theme_bw() +
ggtitle(title) +
ylab(ytitle)
}
# Initialize the seq depth data frame with the first aligner
seq.depth.df <- seq.depth.fn(countList[[1]], Aligners[1])
# Extend the seq depth data frame with the rest of aligners
for (x in Aligners) {
if (x %in% Aligners[2:length(Aligners)]) {
seq.depth.df <- rbind(seq.depth.df,
seq.depth.fn(countList[[x]], x))
}
}
# Explore how the data frame
print(seq.depth.df)
## Sample Count Aligner
## 1 Mock-rep1 3755460 Salmon
## 2 Mock-rep2 3598534 Salmon
## 3 Mock-rep3 3153053 Salmon
## 4 SARS-CoV-2-rep1 4998240 Salmon
## 5 SARS-CoV-2-rep2 2781926 Salmon
## 6 SARS-CoV-2-rep3 3641081 Salmon
## 7 Mock-rep1 29098819 STAR
## 8 Mock-rep2 28578955 STAR
## 9 Mock-rep3 25163540 STAR
## 10 SARS-CoV-2-rep1 35933153 STAR
## 11 SARS-CoV-2-rep2 19824100 STAR
## 12 SARS-CoV-2-rep3 27373805 STAR
## 13 Mock-rep1 27242642 HISAT2
## 14 Mock-rep2 26791029 HISAT2
## 15 Mock-rep3 23859044 HISAT2
## 16 SARS-CoV-2-rep1 33942259 HISAT2
## 17 SARS-CoV-2-rep2 18758823 HISAT2
## 18 SARS-CoV-2-rep3 26005791 HISAT2
summary(seq.depth.df)
## Sample Count Aligner
## Length:18 Min. : 2781926 Length:18
## Class :character 1st Qu.: 4066155 Class :character
## Mode :character Median :24511292 Mode :character
## Mean :19138903
## 3rd Qu.:27341014
## Max. :35933153
# Convert character vectors to factors
seq.depth.df$Sample <- factor(seq.depth.df$Sample,
levels=SampleNames)
seq.depth.df$Aligner <- factor(seq.depth.df$Aligner,
levels=Aligners)
# Create a plot presenting sequencing depth
comparing.barplot.fn(seq.depth.df,
seq.depth.df$Count,
"Sequencing Depth by Sample and Aligner",
"Count")
# Initialize new lists for storing dds objects
ddsList <- countList
# Initialize new lists for storing vsd objects
vsdList <- countList
for (x in Aligners) {
# Create a count matrix from the count data frame
m <- countList[[x]][, colnames(countList[[x]]) != "GENEID"] %>%
as.matrix()
# Assigne row names
rownames(m) <- countList[[x]]$GENEID
# Generate a DESeq2 object
ddsList[[x]] <- DESeqDataSetFromMatrix(m,
colData=metadata,
design=~Group)
# Conduct vst
vsdList[[x]] <- varianceStabilizingTransformation(ddsList[[x]],
blind=TRUE)
}
# Explore generated objects
summary(ddsList)
## Length Class Mode
## Salmon 12202 DESeqDataSet S4
## STAR 60675 DESeqDataSet S4
## HISAT2 60695 DESeqDataSet S4
summary(vsdList)
## Length Class Mode
## Salmon 12202 DESeqTransform S4
## STAR 60675 DESeqTransform S4
## HISAT2 60695 DESeqTransform S4
# Calculate and add size factors to the DEseq object
for (x in Aligners) {
ddsList[[x]] <- estimateSizeFactors(ddsList[[x]])
}
# Set a function summarizing size factors by aligner to a data frame
sfactor.fn <- function(df, aligner) {
sizefactor <- as.data.frame(round(sizeFactors(df), 3)) %>%
rownames_to_column(var="Sample") %>%
mutate(Aligner=aligner)
names(sizefactor) <- c("Sample", "Size_Factor", "Aligner")
return(sizefactor)
}
# Initialize a data frame with the first aligner
size.factor.df <- sfactor.fn(ddsList[[1]], Aligners[1])
for (x in Aligners) {
if (x != Aligners[1]) {
size.factor.df <- rbind(size.factor.df,
sfactor.fn(ddsList[[x]], x))
}
}
# Explore the data frame
print(size.factor.df)
## Sample Size_Factor Aligner
## 1 Mock-rep1 1.102 Salmon
## 2 Mock-rep2 1.055 Salmon
## 3 Mock-rep3 0.881 Salmon
## 4 SARS-CoV-2-rep1 1.366 Salmon
## 5 SARS-CoV-2-rep2 0.767 Salmon
## 6 SARS-CoV-2-rep3 1.009 Salmon
## 7 Mock-rep1 1.101 STAR
## 8 Mock-rep2 1.056 STAR
## 9 Mock-rep3 0.886 STAR
## 10 SARS-CoV-2-rep1 1.363 STAR
## 11 SARS-CoV-2-rep2 0.747 STAR
## 12 SARS-CoV-2-rep3 1.020 STAR
## 13 Mock-rep1 1.091 HISAT2
## 14 Mock-rep2 1.048 HISAT2
## 15 Mock-rep3 0.894 HISAT2
## 16 SARS-CoV-2-rep1 1.359 HISAT2
## 17 SARS-CoV-2-rep2 0.748 HISAT2
## 18 SARS-CoV-2-rep3 1.025 HISAT2
# Convert character vectors to factors
size.factor.df$Sample <- factor(size.factor.df$Sample,
levels=SampleNames)
size.factor.df$Aligner <- factor(size.factor.df$Aligner,
levels=Aligners)
# Plot calculated size factors
comparing.barplot.fn(size.factor.df,
size.factor.df$Size_Factor,
"Size Factors by Aligner and Sample",
"Size Factor") + geom_hline(yintercept=1, linetype="dashed", color="black", size=1)
for (x in Aligners) {
# Dispersion
ddsList[[x]] <- estimateDispersions(ddsList[[x]])
# Wald test
ddsList[[x]] <- nbinomWaldTest(ddsList[[x]])
}
# Explore generated data in the dds object
ddsList[[1]]
## class: DESeqDataSet
## dim: 12202 6
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(12202): ENSG00000000005 ENSG00000001561 ... ENSG00000288631
## ENSG00000288642
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): Mock-rep1 Mock-rep2 ... SARS-CoV-2-rep2 SARS-CoV-2-rep3
## colData names(6): Sample Group ... HISAT2_path sizeFactor
# Assigne what to compare
GroupOfInterest <- Contrast[1]
# Set a function for sample pca
qcpca.fn <- function(obj, title) {
plotPCA(obj,
intgroup=GroupOfInterest,
returnData=FALSE) + theme_bw() + ggtitle(paste("PCA:", title))
}
# Print the plots
qcpca.fn(vsdList[[1]], Aligners[1])
qcpca.fn(vsdList[[2]], Aligners[2])
qcpca.fn(vsdList[[3]], Aligners[3])
# Heatmap annotation
HeatmapAnno <- metadata[, c("Sample", "Group")]
# Set a function generating a correlation heatmap
cheatmap.fn <- function(df, title) {
# Extract a normalized count matrix
vm <- assay(df)
# Generate a correlation matrix
cm <- cor(vm)
# Generate a heatmap
pheatmap(cm,
annotation=HeatmapAnno,
main=paste("Sample Correlation Heatmap:", title))
}
# Print the heatmaps
cheatmap.fn(vsdList[[1]], Aligners[1])
cheatmap.fn(vsdList[[2]], Aligners[2])
cheatmap.fn(vsdList[[3]], Aligners[3])
# Run DESeq
for (x in Aligners) {
ddsList[[x]] <- DESeq(ddsList[[x]])
# Check result names
ResNames <- resultsNames(ddsList[[x]])
print(ResNames)
}
## [1] "Intercept" "Group_Covid_vs_Mock"
## [1] "Intercept" "Group_Covid_vs_Mock"
## [1] "Intercept" "Group_Covid_vs_Mock"
# Set a function plotting dispersion
dplot.fn <- function(dds, title) {
plotDispEsts(dds,
main=paste("Dispersion over Counts:", title))
}
# Plot dispersion patterns
dplot.fn(ddsList[[1]], Aligners[1])
dplot.fn(ddsList[[2]], Aligners[2])
dplot.fn(ddsList[[3]], Aligners[3])
# Do they fit well with the DESeq2 estimation model?
# Set FDR threshold alpha
alpha=0.1
# Set the coefficients to compare
Coef <- ResNames[-1]
print(Coef)
## [1] "Group_Covid_vs_Mock"
# Set a function to clean a result table
lfctable.fn <- function(df) {
df <- df %>%
rownames_to_column(var="GENEID") %>%
mutate(FDR=ifelse(padj < 0.1 & !is.na(padj),
"< 0.1",
"> 0.1"))
return(df)
}
# Set a function extracting results
extract.lfc.fn <- function(dds) {
res <- results(dds, contrast=Contrast, alpha=alpha)
lfctable.fn(as.data.frame(res))
return(lfctable.fn(as.data.frame(res)))
}
# Initialize a list storing lfc data frames
lfcList <- countList
# Extract DE results
# The Contrast variable was defined in the previous chunk
# Extraction with no shrinkage
# alpha: FDR threshold
for (x in Aligners) {
lfcList[[x]] <- extract.lfc.fn(ddsList[[x]]) %>% mutate(Alignment=x)
print(head(lfcList[[x]]))
}
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1 ENSG00000000005 2.812684 0.6060461 1.5752853 0.3847215 0.7004437685
## 2 ENSG00000001561 2239.154319 0.6071092 0.1375663 4.4132123 0.0000101848
## 3 ENSG00000002933 21.092428 0.1331559 0.5602769 0.2376608 0.8121441958
## 4 ENSG00000003056 1474.374306 -0.1125374 0.1342006 -0.8385756 0.4017075232
## 5 ENSG00000003137 2.877892 1.0930146 1.5681151 0.6970245 0.4857874807
## 6 ENSG00000004478 281.796648 -0.1633303 0.1776798 -0.9192394 0.3579703792
## padj FDR Alignment
## 1 NA > 0.1 Salmon
## 2 0.0002194611 < 0.1 Salmon
## 3 0.9623510132 > 0.1 Salmon
## 4 0.7683747247 > 0.1 Salmon
## 5 NA > 0.1 Salmon
## 6 0.7374787300 > 0.1 Salmon
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1 ENSG00000223972 16.382284 0.2105315 0.5783461 0.3640234 7.158405e-01
## 2 ENSG00000227232 39.073336 0.2487485 0.3713579 0.6698350 5.029630e-01
## 3 ENSG00000278267 0.000000 NA NA NA NA
## 4 ENSG00000243485 6.640203 0.5409279 0.8752427 0.6180319 5.365543e-01
## 5 ENSG00000284332 0.000000 NA NA NA NA
## 6 ENSG00000237613 202.330477 1.5163702 0.2143010 7.0758897 1.484930e-12
## padj FDR Alignment
## 1 8.876896e-01 > 0.1 STAR
## 2 7.609666e-01 > 0.1 STAR
## 3 NA > 0.1 STAR
## 4 NA > 0.1 STAR
## 5 NA > 0.1 STAR
## 6 1.128012e-10 < 0.1 STAR
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1 ENSG00000223972 12.134455 0.42992171 0.6624624 0.64897530 5.163543e-01
## 2 ENSG00000227232 47.652923 -0.02201754 0.3417785 -0.06442051 9.486354e-01
## 3 ENSG00000278267 0.000000 NA NA NA NA
## 4 ENSG00000243485 4.601038 0.83527457 1.0474104 0.79746640 4.251802e-01
## 5 ENSG00000284332 0.000000 NA NA NA NA
## 6 ENSG00000237613 167.762489 1.55049410 0.2207492 7.02378060 2.159438e-12
## padj FDR Alignment
## 1 7.804067e-01 > 0.1 HISAT2
## 2 9.834913e-01 > 0.1 HISAT2
## 3 NA > 0.1 HISAT2
## 4 NA > 0.1 HISAT2
## 5 NA > 0.1 HISAT2
## 6 1.632049e-10 < 0.1 HISAT2
# Initialize a data frame storing total lfc results across the aligners
lfc.dataframe <- lfcList[[1]]
for (x in Aligners[2:length(Aligners)]) {
lfc.dataframe <- rbind(lfc.dataframe,
lfcList[[x]])
}
lfc.dataframe$Alignment <- factor(lfc.dataframe$Alignment,
levels=Aligners)
# Plot distribution of FDR
ggplot(lfc.dataframe,
aes(x=padj, y=..count.., color=Alignment)) +
geom_density(size=1) +
theme_bw() +
scale_x_log10() +
ggtitle("Distribution of False Discovery Rate (FDR) by Aligner") +
ylab("Count") +
xlim(0.00001, 1) +
geom_vline(xintercept=alpha,
color="black",
size=1, linetype="dashed") + scale_x_continuous(breaks=seq(0, 1, by=0.1))
valid.lfc.df <- subset(lfc.dataframe, FDR == "< 0.1")
ggplot(valid.lfc.df,
aes(x=log2FoldChange,
y=..count..,
color=Alignment)) +
geom_density(size=1) +
theme_bw() +
geom_vline(xintercept=c(-1, 1),
linetype="dashed", color="black", size=1) +
ggtitle("Distribution of log2FoldChange Values by Aligner (FDR < 0.1)") +
ylab("Count") +
xlim(-10, 10) # Change xlim by datatype
# Set ylim: has to adjusted by users depending on data
yl <- c(-12, 12)
# Set min log2 fold change of interest
mLog <- c(-1, 1)
# Create MA plots by Aligner
ggplot(lfc.dataframe, aes(x=baseMean, y=log2FoldChange, color=FDR)) +
geom_point() +
facet_grid(~Alignment) +
scale_x_log10() +
theme_bw() +
scale_color_manual(values=c("blue", "grey")) +
ggtitle(paste("MA plot")) +
ylim(yl[1], yl[2]) +
theme(strip.text.x=element_text(size=10)) +
geom_hline(yintercept=c(mLog[1], mLog[2]),
linetype="dashed", color="red")
# Initialize a list
heatmap.df.List <- lfcList
# Filter genes with FDR < alpha and absolute log2FoldChange > 1
for (x in Aligners) {
# Set a logical vector filtering FDR below alpha
is.fdr.valid <- lfcList[[x]]$FDR == paste("<", alpha)
# Set a logical vector filtering absolute lfc above 1
is.lfc.large <- abs(lfcList[[x]]$log2FoldChange) > mLog[2]
# Extract total normalized counts
norm.counts <- counts(ddsList[[x]], normalized=T)
# Save filtered genes only from the normalized count data
heatmap.df.List[[x]] <- norm.counts[is.fdr.valid & is.lfc.large,]
}
# Explore the cleaned data frames
head(heatmap.df.List[[1]])
## Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1
## ENSG00000034510 5469.6147987 8357.491603 7025.508228 15729.32876
## ENSG00000049249 0.9070671 13.276397 3.405481 91.53473
## ENSG00000078401 8.1636042 22.759537 10.216444 392.50094
## ENSG00000081051 4.5353357 5.689884 3.405481 32.22023
## ENSG00000090339 58.0522964 104.314544 57.893185 229.20297
## ENSG00000095587 53.5169607 42.674132 44.271259 131.81002
## SARS-CoV-2-rep2 SARS-CoV-2-rep3
## ENSG00000034510 16308.2614 12263.77561
## ENSG00000049249 103.0517 28.74168
## ENSG00000078401 266.1082 151.63712
## ENSG00000081051 27.3935 8.91983
## ENSG00000090339 177.4055 137.76182
## ENSG00000095587 139.5764 71.35864
head(heatmap.df.List[[2]])
## Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1
## ENSG00000237613 296.941593 294.510344 304.909944 74.09817
## ENSG00000177133 170.718714 143.940747 146.808491 33.01404
## ENSG00000097021 34.506974 35.038208 35.008179 129.85521
## ENSG00000049249 1.816157 14.204679 1.129296 96.10753
## ENSG00000284747 34.506974 40.720080 27.103106 172.40663
## ENSG00000284716 2.724235 5.681872 2.258592 13.93926
## SARS-CoV-2-rep2 SARS-CoV-2-rep3
## ENSG00000237613 129.79225 113.730562
## ENSG00000177133 48.17032 67.650075
## ENSG00000097021 149.86322 86.278357
## ENSG00000049249 104.36903 26.471769
## ENSG00000284747 148.52515 125.495792
## ENSG00000284716 21.40903 7.843487
head(heatmap.df.List[[3]])
## Mock-rep1 Mock-rep2 Mock-rep3 SARS-CoV-2-rep1 SARS-CoV-2-rep2
## ENSG00000237613 244.74613 244.27485 258.489470 64.00598 108.24497
## ENSG00000177133 163.16409 135.49620 139.875254 32.37084 45.43616
## ENSG00000158286 58.66574 33.39695 40.284073 24.27813 26.72715
## ENSG00000097021 32.08283 32.44275 29.094053 125.80486 134.97212
## ENSG00000049249 0.00000 14.31298 2.238004 93.43402 101.56318
## ENSG00000284747 32.99948 39.12214 22.380041 165.53271 144.32663
## SARS-CoV-2-rep3
## ENSG00000237613 86.81353
## ENSG00000177133 63.40314
## ENSG00000158286 13.65606
## ENSG00000097021 79.98550
## ENSG00000049249 21.45953
## ENSG00000284747 119.97825
dim(heatmap.df.List[[1]])
## [1] 211 6
dim(heatmap.df.List[[2]])
## [1] 1244 6
dim(heatmap.df.List[[3]])
## [1] 1240 6
pheatmap(heatmap.df.List[[3]],
annotation=HeatmapAnno,
scale="row",
show_rownames=F)
# Set a function creating a profiling heatmap
profile.heatmap.fn <- function(df, title) {
pheatmap(df,
annotation=HeatmapAnno,
scale="row",
show_rownames=F,
main=paste("Expression Profiling by", title, "(FDR < 0.1, absolute log2FoldChange > 1)"))
}
# Print the expression heatmaps
profile.heatmap.fn(heatmap.df.List[[Aligners[1]]], Aligners[1])
profile.heatmap.fn(heatmap.df.List[[Aligners[2]]], Aligners[2])
profile.heatmap.fn(heatmap.df.List[[Aligners[3]]], Aligners[3])
# Count number of NA genes
type=c("Zero Counts", "Outliers", "Total NA Genes")
# Create a data frame storing number of NA genes by type
NA.genes <- lfc.dataframe %>%
group_by(Alignment) %>%
summarize(zero=sum(is.na(log2FoldChange)),
outlier=sum(is.na(pvalue) & is.na(padj))) %>%
mutate(total=zero + outlier) %>%
gather(Type, Number, -Alignment) %>%
mutate(Type=factor(case_when(Type == "zero" ~ type[1],
Type == "outlier" ~ type[2],
Type == "total" ~ type[3]),
levels=type))
# Plot number of NA genes
ggplot(NA.genes,
aes(x=Type, y=Number, group=Alignment, fill=Alignment, label=Number)) +
geom_bar(stat="identity", position="dodge") +
theme_bw() +
geom_text(position=position_dodge(width=1), vjust=1.5) +
ggtitle("Number of NA Genes") +
ylab("Number of Genes")
# Create a new list having DE table with FDR below alpha
fdr.rank <- lfcList
lfc.rank <- lfcList
up.lfc.rank <- lfcList
down.lfc.rank <- lfcList
# Set a sorting genes with FDR below alpha
filter.fdr.fn <- function(df) {as.data.table(df[df$FDR == paste("<", alpha),])}
# Set a function creating a column assigning ranking
Ranking.fn <- function(x) {mutate(x, Rank=1:nrow(x))}
for (x in Aligners) {
rdf <- lfcList[[x]]
fdr.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(padj) %>% Ranking.fn()
lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(desc(abs(log2FoldChange))) %>% Ranking.fn()
up.lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(desc(log2FoldChange)) %>% Ranking.fn()
down.lfc.rank[[x]] <- filter.fdr.fn(rdf) %>% arrange(log2FoldChange) %>% Ranking.fn()
}
# Explore the ranking outputs
head(fdr.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSG00000118523 3787.2627 -3.482245 0.1816454 -19.17057 6.519597e-82
## 2: ENSG00000181104 2463.2229 -1.947074 0.1146592 -16.98140 1.127610e-64
## 3: ENSG00000211455 11906.2870 -1.962427 0.1203952 -16.29987 9.889571e-60
## 4: ENSG00000163814 1523.1071 -2.801335 0.1769621 -15.83014 1.927841e-56
## 5: ENSG00000138271 537.8105 -3.670375 0.2458998 -14.92630 2.222551e-50
## 6: ENSG00000164220 3016.6431 -1.822077 0.1254460 -14.52479 8.440025e-48
## padj FDR Alignment Rank
## 1: 2.382261e-78 < 0.1 Salmon 1
## 2: 2.060143e-61 < 0.1 Salmon 2
## 3: 1.204550e-56 < 0.1 Salmon 3
## 4: 1.761083e-53 < 0.1 Salmon 4
## 5: 1.624240e-47 < 0.1 Salmon 5
## 6: 5.139975e-45 < 0.1 Salmon 6
head(lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSG00000244398 15.001200 -20.026364 3.909749 -5.122161 3.020543e-07
## 2: ENSG00000198796 51.198466 -8.170872 1.286827 -6.349630 2.158341e-10
## 3: ENSG00000248167 14.721298 -7.330936 1.346565 -5.444174 5.204631e-08
## 4: ENSG00000178776 7.008791 -6.263361 1.590850 -3.937117 8.246658e-05
## 5: ENSG00000263647 6.054767 6.059712 1.580750 3.833440 1.263635e-04
## 6: ENSG00000137673 135.525339 -5.352827 1.108962 -4.826882 1.386874e-06
## padj FDR Alignment Rank
## 1: 1.031501e-05 < 0.1 Salmon 1
## 2: 1.232278e-08 < 0.1 Salmon 2
## 3: 2.113080e-06 < 0.1 Salmon 3
## 4: 1.382261e-03 < 0.1 Salmon 4
## 5: 2.016298e-03 < 0.1 Salmon 5
## 6: 3.959091e-05 < 0.1 Salmon 6
head(up.lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSG00000263647 6.054767 6.059712 1.580750 3.833440 1.263635e-04
## 2: ENSG00000271723 19.814732 4.755253 0.948979 5.010914 5.417202e-07
## 3: ENSG00000261408 5.238435 4.080830 1.528257 2.670251 7.579457e-03
## 4: ENSG00000251177 8.978501 3.765180 1.076503 3.497602 4.694606e-04
## 5: ENSG00000240418 5.611221 3.371816 1.366647 2.467217 1.361678e-02
## 6: ENSG00000144649 6.843725 3.304941 1.172805 2.817981 4.832668e-03
## padj FDR Alignment Rank
## 1: 2.016298e-03 < 0.1 Salmon 1
## 2: 1.783284e-05 < 0.1 Salmon 2
## 3: 5.663668e-02 < 0.1 Salmon 3
## 4: 5.977035e-03 < 0.1 Salmon 4
## 5: 8.737443e-02 < 0.1 Salmon 5
## 6: 3.986133e-02 < 0.1 Salmon 6
head(down.lfc.rank[[1]])
## GENEID baseMean log2FoldChange lfcSE stat pvalue
## 1: ENSG00000244398 15.001200 -20.026364 3.9097492 -5.122161 3.020543e-07
## 2: ENSG00000198796 51.198466 -8.170872 1.2868266 -6.349630 2.158341e-10
## 3: ENSG00000248167 14.721298 -7.330936 1.3465654 -5.444174 5.204631e-08
## 4: ENSG00000178776 7.008791 -6.263361 1.5908499 -3.937117 8.246658e-05
## 5: ENSG00000137673 135.525339 -5.352827 1.1089617 -4.826882 1.386874e-06
## 6: ENSG00000078401 141.897648 -4.295995 0.3983792 -10.783684 4.110862e-27
## padj FDR Alignment Rank
## 1: 1.031501e-05 < 0.1 Salmon 1
## 2: 1.232278e-08 < 0.1 Salmon 2
## 3: 2.113080e-06 < 0.1 Salmon 3
## 4: 1.382261e-03 < 0.1 Salmon 4
## 5: 3.959091e-05 < 0.1 Salmon 5
## 6: 1.502109e-24 < 0.1 Salmon 6
# Set a function rebuilding DE tables with gene ranks
rankdiff.fn <- function(List){
# Select columns and join the data frames by gene
full_join(List[[Aligners[2]]][, .(GENEID, Alignment, Rank, baseMean)],
List[[Aligners[3]]][, .(GENEID, Alignment, Rank, baseMean)],
by="GENEID") %>%
# Add columns assining gene expression levels, rank differences, and mean ranks
mutate(logMeanExpression=log(baseMean.x+baseMean.y/2),
RankDiff=Rank.x-Rank.y,
MeanRank=(Rank.x+Rank.y)/2)
}
# Calculate rank difference by ranking type
fdr.rankdiff <- rankdiff.fn(fdr.rank)
lfc.rankdiff <- rankdiff.fn(lfc.rank)
up.lfc.rankdiff <- rankdiff.fn(up.lfc.rank)
down.lfc.rankdiff <- rankdiff.fn(down.lfc.rank)
# Explore the calculated rank differences
head(fdr.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000261008 STAR 1 2036.3102 HISAT2 1 1977.5615
## 2: ENSG00000169908 STAR 2 2336.0090 HISAT2 2 2262.3133
## 3: ENSG00000118523 STAR 3 3921.7264 HISAT2 3 3820.8621
## 4: ENSG00000136943 STAR 4 11817.9679 HISAT2 4 11418.8145
## 5: ENSG00000109205 STAR 5 382.4466 HISAT2 5 374.5982
## 6: ENSG00000181104 STAR 6 2551.1683 HISAT2 6 2478.0294
## logMeanExpression RankDiff MeanRank
## 1: 8.014696 0 1
## 2: 8.151093 0 2
## 3: 8.671142 0 3
## 4: 9.771519 0 4
## 5: 6.345190 0 5
## 6: 8.240170 0 6
head(lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000259895 STAR 1 200.612721 <NA> NA NA
## 2: ENSG00000138675 STAR 2 41.175215 HISAT2 3 39.989193
## 3: ENSG00000178776 STAR 3 8.745942 HISAT2 2 8.182662
## 4: ENSG00000198796 STAR 4 37.920036 HISAT2 1 37.316116
## 5: ENSG00000187689 STAR 5 64.800783 HISAT2 5 62.192291
## 6: ENSG00000122641 STAR 6 128.103619 HISAT2 4 124.739803
## logMeanExpression RankDiff MeanRank
## 1: NA NA NA
## 2: 4.113654 -1 2.5
## 3: 2.552353 1 2.5
## 4: 4.035622 3 2.5
## 5: 4.563274 0 5.0
## 6: 5.249513 2 5.0
head(up.lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000259895 STAR 1 200.612721 <NA> NA NA
## 2: ENSG00000103184 STAR 2 8.588905 HISAT2 8 7.399501
## 3: ENSG00000105641 STAR 3 14.872126 HISAT2 1 13.620723
## 4: ENSG00000256654 STAR 4 15.529629 HISAT2 7 14.733459
## 5: ENSG00000251177 STAR 5 9.858227 HISAT2 5 9.732927
## 6: ENSG00000144649 STAR 6 7.421817 HISAT2 2 6.526874
## logMeanExpression RankDiff MeanRank
## 1: NA NA NA
## 2: 2.508677 -6 5.0
## 3: 3.076505 2 2.0
## 4: 3.130978 -3 5.5
## 5: 2.689526 0 5.0
## 6: 2.368865 4 4.0
head(down.lfc.rankdiff)
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000138675 STAR 1 41.175215 HISAT2 3 39.989193
## 2: ENSG00000178776 STAR 2 8.745942 HISAT2 2 8.182662
## 3: ENSG00000198796 STAR 3 37.920036 HISAT2 1 37.316116
## 4: ENSG00000187689 STAR 4 64.800783 HISAT2 5 62.192291
## 5: ENSG00000122641 STAR 5 128.103619 HISAT2 4 124.739803
## 6: ENSG00000137673 STAR 6 143.085583 HISAT2 7 137.747910
## logMeanExpression RankDiff MeanRank
## 1: 4.113654 -2 2.0
## 2: 2.552353 0 2.0
## 3: 4.035622 2 2.0
## 4: 4.563274 -1 4.5
## 5: 5.249513 1 4.5
## 6: 5.356395 -1 6.5
# Set a function plotting DEG rankings
ranking.plot.fn <- function(df, rankedby) {
ggplot(df,
aes(x=Rank.x, y=Rank.y, color=logMeanExpression)) + geom_point(alpha=0.5) + theme_bw() + theme(strip.text.x=element_text(size=10)) + xlab("Ranking in STAR") + ylab("Ranking in HISAT2") + geom_abline(slope=1, color="black", size=0.5) + ggtitle(paste("Gene Ranking by", rankedby)) + scale_color_gradient(low="blue", high="red")
}
# Plot rankings by ranking type
ranking.plot.fn(fdr.rankdiff, "FDR")
ranking.plot.fn(lfc.rankdiff, "Log2FoldChange")
ranking.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase)")
ranking.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease)")
# Set a function plotting DEG rank difference
rankdiff.plot.fn <- function(df, rankedby, ylim) {
ggplot(df, aes(x=logMeanExpression, y=RankDiff, color=MeanRank)) +
geom_point(alpha=0.5) +
theme_bw() +
ylab("Rank Difference (STAR - HISAT2)") +
ggtitle(paste("Rank Difference (STAR - HISAT2)\nin", rankedby)) +
geom_hline(yintercept=0, color="black", size=0.5) + scale_color_gradient(low="blue", high="red") +
ylim(-ylim, ylim)
}
# Display the plots in the same y-scale
rankdiff.plot.fn(fdr.rankdiff, "FDR", 2500)
rankdiff.plot.fn(lfc.rankdiff, "Log2FoldChange", 2500)
rankdiff.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase)", 2500)
rankdiff.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease)", 2500)
# Display the plots in free y-scale
rankdiff.plot.fn(lfc.rankdiff, "Log2FoldChange (free y-scale)", 1500)
rankdiff.plot.fn(up.lfc.rankdiff, "Log2FoldChange (Increase, free y-scale)", 750)
rankdiff.plot.fn(down.lfc.rankdiff, "Log2FoldChange (Decrease, free y-scale)", 600)
# Create a list storing rankdiff data frames
rankList <- list(fdr.rankdiff,
lfc.rankdiff,
up.lfc.rankdiff,
down.lfc.rankdiff)
# Assine result column as a factor with levels
rankdiff.levels <- c("FDR",
"log2FoldChange",
"log2FoldChange.Increase",
"log2FoldChange.Decrease")
# Name the list
names(rankList) <- rankdiff.levels
# Create a new data frame storing rank difference by result type
rankdiff.dist <- data.frame(FDR=abs(rankList[[1]]$RankDiff),
log2FoldChange=abs(rankList[[2]]$RankDiff),
log2FoldChange.Increase=abs(rankList[[3]]$RankDiff),
log2FoldChange.Decrease=abs(rankList[[4]]$RankDiff)) %>% gather(Result, RankDiff)
rankdiff.dist$Result <- factor(rankdiff.dist$Result, levels=rankdiff.levels)
# Plot distribution of absolute rank difference
ggplot(rankdiff.dist,
aes(x=Result, y=RankDiff, color=Result)) +
geom_jitter(alpha=0.5) +
geom_boxplot(alpha=0.5, fill="grey", color="black") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, hjust=1)) +
ggtitle("Distribution of Absolute Rank Difference without Shrinkage \n(STAR - HISAT2)") +
ylab("Absolute Rank Difference (STAR - HISAT2)")
# Create a data frame storing the number of transcripts by gene id
AnnoDb.ntrans <- AnnoDb %>%
group_by(ENSEMBL) %>%
summarize(num.trans=n())
# Set a function adding the number of transcripts by gene id
add.ntrans.fn <- function(df) {
left_join(df, AnnoDb.ntrans, by=c("GENEID"="ENSEMBL"))}
# Add a column indicating the number of transcripts by gene id to every rankdiff data frame
for (x in rankdiff.levels) {
rankList[[x]] <- add.ntrans.fn(rankList[[x]])
}
# Explore the edited data frames
summary(rankList)
## Length Class Mode
## FDR 11 data.table list
## log2FoldChange 11 data.table list
## log2FoldChange.Increase 11 data.table list
## log2FoldChange.Decrease 11 data.table list
head(rankList[[1]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000261008 STAR 1 2036.3102 HISAT2 1 1977.5615
## 2: ENSG00000169908 STAR 2 2336.0090 HISAT2 2 2262.3133
## 3: ENSG00000118523 STAR 3 3921.7264 HISAT2 3 3820.8621
## 4: ENSG00000136943 STAR 4 11817.9679 HISAT2 4 11418.8145
## 5: ENSG00000109205 STAR 5 382.4466 HISAT2 5 374.5982
## 6: ENSG00000181104 STAR 6 2551.1683 HISAT2 6 2478.0294
## logMeanExpression RankDiff MeanRank num.trans
## 1: 8.014696 0 1 NA
## 2: 8.151093 0 2 NA
## 3: 8.671142 0 3 1
## 4: 9.771519 0 4 NA
## 5: 6.345190 0 5 NA
## 6: 8.240170 0 6 2
head(rankList[[2]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000259895 STAR 1 200.612721 <NA> NA NA
## 2: ENSG00000138675 STAR 2 41.175215 HISAT2 3 39.989193
## 3: ENSG00000178776 STAR 3 8.745942 HISAT2 2 8.182662
## 4: ENSG00000198796 STAR 4 37.920036 HISAT2 1 37.316116
## 5: ENSG00000187689 STAR 5 64.800783 HISAT2 5 62.192291
## 6: ENSG00000122641 STAR 6 128.103619 HISAT2 4 124.739803
## logMeanExpression RankDiff MeanRank num.trans
## 1: NA NA NA NA
## 2: 4.113654 -1 2.5 NA
## 3: 2.552353 1 2.5 3
## 4: 4.035622 3 2.5 6
## 5: 4.563274 0 5.0 NA
## 6: 5.249513 2 5.0 NA
head(rankList[[3]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000259895 STAR 1 200.612721 <NA> NA NA
## 2: ENSG00000103184 STAR 2 8.588905 HISAT2 8 7.399501
## 3: ENSG00000105641 STAR 3 14.872126 HISAT2 1 13.620723
## 4: ENSG00000256654 STAR 4 15.529629 HISAT2 7 14.733459
## 5: ENSG00000251177 STAR 5 9.858227 HISAT2 5 9.732927
## 6: ENSG00000144649 STAR 6 7.421817 HISAT2 2 6.526874
## logMeanExpression RankDiff MeanRank num.trans
## 1: NA NA NA NA
## 2: 2.508677 -6 5.0 NA
## 3: 3.076505 2 2.0 2
## 4: 3.130978 -3 5.5 NA
## 5: 2.689526 0 5.0 1
## 6: 2.368865 4 4.0 5
head(rankList[[4]])
## GENEID Alignment.x Rank.x baseMean.x Alignment.y Rank.y baseMean.y
## 1: ENSG00000138675 STAR 1 41.175215 HISAT2 3 39.989193
## 2: ENSG00000178776 STAR 2 8.745942 HISAT2 2 8.182662
## 3: ENSG00000198796 STAR 3 37.920036 HISAT2 1 37.316116
## 4: ENSG00000187689 STAR 4 64.800783 HISAT2 5 62.192291
## 5: ENSG00000122641 STAR 5 128.103619 HISAT2 4 124.739803
## 6: ENSG00000137673 STAR 6 143.085583 HISAT2 7 137.747910
## logMeanExpression RankDiff MeanRank num.trans
## 1: 4.113654 -2 2.0 NA
## 2: 2.552353 0 2.0 3
## 3: 4.035622 2 2.0 6
## 4: 4.563274 -1 4.5 NA
## 5: 5.249513 1 4.5 NA
## 6: 5.356395 -1 6.5 3
# Set a function plotting rank difference vs number of transcripts
rank.ntrans.plot.fn <- function(df, title) {
ggplot(df, aes(x=num.trans, y=abs(RankDiff), color=MeanRank)) +
geom_jitter(alpha=0.5) +
theme_bw() +
ggtitle(paste("Rank Difference vs Number of Alternative Transcripts \nin", title)) +
xlab("Number of Alternative Transcripts") +
ylab("Absolute Rank Difference \n(STAR - HISAT2)") + scale_color_gradient(low="blue", high="red")
}
# Print the plots
rank.ntrans.plot.fn(rankList[[1]], "FDR")
rank.ntrans.plot.fn(rankList[[2]], "log2FoldChange")
rank.ntrans.plot.fn(rankList[[3]], "log2FoldChange (Increase)")
rank.ntrans.plot.fn(rankList[[4]], "log2FoldChange (Decrease)")
# Initialize a list storing rankdiff genes
large.rankdiff <- rankList
# Assign a vector storing minimum (thresholds) rankdiff for filtering large rankdiff genes
rankdiff.thr <- c(500, 250, 250, 250)
names(rankdiff.thr) <- rankdiff.levels
for (x in rankdiff.levels) {
# Filter out observations below the rankdiff thresholds
large.rankdiff[[x]] <- subset(rankList[[x]],
abs(RankDiff) > rankdiff.thr[x])
}
# Explore the filtered genes
summary(large.rankdiff)
## Length Class Mode
## FDR 11 data.table list
## log2FoldChange 11 data.table list
## log2FoldChange.Increase 11 data.table list
## log2FoldChange.Decrease 11 data.table list
dim(large.rankdiff[[rankdiff.levels[1]]])
## [1] 195 11
dim(large.rankdiff[[rankdiff.levels[2]]])
## [1] 106 11
dim(large.rankdiff[[rankdiff.levels[3]]])
## [1] 14 11
dim(large.rankdiff[[rankdiff.levels[4]]])
## [1] 20 11
# Generate a data frame storing upset input variables
upset.dataframe <- subset(lfc.dataframe, !is.na(padj)) %>%
mutate(Up=ifelse(FDR == paste("<", alpha) & log2FoldChange > 0, GENEID, ""),
Down=ifelse(FDR == paste("<", alpha) & log2FoldChange < 0, GENEID, ""),
Unchanged=ifelse(FDR == paste(">", alpha), GENEID, ""),
Salmon=ifelse(Alignment == "Salmon", GENEID, ""),
STAR=ifelse(Alignment == "STAR", GENEID, ""),
HISAT2=ifelse(Alignment == "HISAT2", GENEID, ""))
# Generate a list input
upset.input <- list(Up=upset.dataframe$Up,
Down=upset.dataframe$Down,
Unchanged=upset.dataframe$Unchanged,
Salmon=upset.dataframe$Salmon,
STAR=upset.dataframe$STAR,
HISAT2=upset.dataframe$HISAT2)
# Create an upset plot
upset(fromList(upset.input),
sets=names(upset.input), # What group to display
sets.x.label="Number of Genes per Group",
order.by="freq",
point.size=3,
sets.bar.color=c("red", "red", "blue", "red", "blue", "blue"),
text.scale = 1.5, number.angles=30)
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.1 LTS
##
## Matrix products: default
## BLAS/LAPACK: /home/mira/miniconda3/envs/r/lib/libopenblasp-r0.3.10.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] AnnotationDbi_1.52.0 UpSetR_1.4.0
## [3] DESeq2_1.30.0 SummarizedExperiment_1.20.0
## [5] Biobase_2.50.0 MatrixGenerics_1.2.0
## [7] matrixStats_0.57.0 GenomicRanges_1.42.0
## [9] GenomeInfoDb_1.26.2 IRanges_2.24.0
## [11] S4Vectors_0.28.1 Rsubread_2.4.0
## [13] tximport_1.18.0 AnnotationHub_2.22.0
## [15] BiocFileCache_1.14.0 dbplyr_2.0.0
## [17] BiocGenerics_0.36.0 pheatmap_1.0.12
## [19] rmarkdown_2.5 forcats_0.5.0
## [21] stringr_1.4.0 dplyr_1.0.2
## [23] purrr_0.3.4 readr_1.4.0
## [25] tidyr_1.1.2 tibble_3.0.4
## [27] ggplot2_3.3.2 tidyverse_1.3.0
## [29] data.table_1.13.4
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-0 ellipsis_0.3.1
## [3] XVector_0.30.0 fs_1.5.0
## [5] rstudioapi_0.13 farver_2.0.3
## [7] bit64_4.0.5 interactiveDisplayBase_1.28.0
## [9] fansi_0.4.1 lubridate_1.7.9.2
## [11] xml2_1.3.2 splines_4.0.3
## [13] geneplotter_1.68.0 knitr_1.30
## [15] jsonlite_1.7.2 broom_0.7.2
## [17] annotate_1.68.0 shiny_1.5.0
## [19] BiocManager_1.30.10 compiler_4.0.3
## [21] httr_1.4.2 backports_1.2.1
## [23] assertthat_0.2.1 Matrix_1.2-18
## [25] fastmap_1.0.1 cli_2.2.0
## [27] later_1.1.0.1 htmltools_0.5.0
## [29] tools_4.0.3 gtable_0.3.0
## [31] glue_1.4.2 GenomeInfoDbData_1.2.4
## [33] rappdirs_0.3.1 Rcpp_1.0.5
## [35] cellranger_1.1.0 vctrs_0.3.5
## [37] xfun_0.19 ps_1.5.0
## [39] rvest_0.3.6 mime_0.9
## [41] lifecycle_0.2.0 XML_3.99-0.5
## [43] zlibbioc_1.36.0 scales_1.1.1
## [45] hms_0.5.3 promises_1.1.1
## [47] RColorBrewer_1.1-2 yaml_2.2.1
## [49] curl_4.3 memoise_1.1.0
## [51] gridExtra_2.3 stringi_1.5.3
## [53] RSQLite_2.2.1 BiocVersion_3.12.0
## [55] genefilter_1.72.0 BiocParallel_1.24.1
## [57] rlang_0.4.9 pkgconfig_2.0.3
## [59] bitops_1.0-6 evaluate_0.14
## [61] lattice_0.20-41 labeling_0.4.2
## [63] bit_4.0.4 tidyselect_1.1.0
## [65] plyr_1.8.6 magrittr_2.0.1
## [67] R6_2.5.0 generics_0.1.0
## [69] DelayedArray_0.16.0 DBI_1.1.0
## [71] pillar_1.4.7 haven_2.3.1
## [73] withr_2.3.0 survival_3.2-7
## [75] RCurl_1.98-1.2 modelr_0.1.8
## [77] crayon_1.3.4 locfit_1.5-9.4
## [79] grid_4.0.3 readxl_1.3.1
## [81] blob_1.2.1 reprex_0.3.0
## [83] digest_0.6.27 xtable_1.8-4
## [85] httpuv_1.5.4 munsell_0.5.0